// Title      : SampleCorpus.java
// Author     : James Baird
// Created    : Sunday, 4th November 2001
// Description: SampleCorpus Class

import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
import java.util.Vector;

public class SampleCorpus
{
  public static void main(String[] args)
  {
    try
    {
      // c:\psychology\mphil\corpus2, c:\psychology\mphil\corpus2xml, 14
  
      sampleCorpus(args[0], args[1], Integer.parseInt(args[2]));
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }
  
  protected static void sampleCorpus(String txtfilepath, String xmlfilepath, int size) throws IOException
  {
    System.out.println("Scanning corpus...");
  
    Corpus corpus = new Corpus(txtfilepath);
    
    Vector structures = corpus.getStructures();
    
    System.out.println(structures.size() + " structure(s) detected");
    
    for (int i = 0; i < structures.size(); i++)
    {
      String structure = (String)structures.elementAt(i);
      
      Vector posts = corpus.getPosts(structure);
      
      System.out.println("  " + structure + "(" + posts.size() + ")");
    }
    
    int minPosts = Integer.MAX_VALUE;
    
    for (int i = 0; i < structures.size(); i++)
    {
      String structure = (String)structures.elementAt(i);
    
      Vector posts = corpus.getPosts(structure);
      
      if (posts.size() < minPosts)
        minPosts = posts.size();
    }
    
    if (minPosts < size)
    {
      size = minPosts;
      
      System.out.println("Size adjusted down to " + minPosts);
    }
    
    Random random = new Random();
    
    for (int i = 0; i < structures.size(); i++)
    {
      String structure = (String)structures.elementAt(i);

      FileWriter outfile = new FileWriter(structure + ".txt");
      
      outfile.write("*** STRUCTURE " + structure + System.getProperty("line.separator"));

      outfile.write(System.getProperty("line.separator"));
    
      Vector posts = corpus.getPosts(structure);
      
      boolean[] used = new boolean[posts.size()];
      
      for (int j = 0; j < posts.size(); j++)
        used[j] = false;
      
      for (int j = 0; j < size; j++)
      {
        int p = random.nextInt(posts.size());
        while (used[p])
          p = random.nextInt(posts.size());
          
        used[p] = true;
        
        Post post = (Post)posts.elementAt(p);
        
        post.getPotentialReplies(outfile, xmlfilepath);
      }

      outfile.flush();
      outfile.close();
    }
  }
}
